{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Named Entity Recognition pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "EXPERIMENT_NAME = 'named-entity-recognition'\n",
    "BUCKET = \"your-bucket-name\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "import kfp\n",
    "from kfp import compiler\n",
    "import kfp.components as comp\n",
    "import kfp.dsl as dsl\n",
    "from kfp import gcp"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load components"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Help on function preprocess:\n",
      "\n",
      "preprocess(input_1_uri:'GCSPath', output_x_uri_template:'GCSPath', output_y_uri_template:'GCSPath', output_preprocessing_state_uri_template:'GCSPath')\n",
      "    Performs the IOB preprocessing.\n",
      "\n",
      "Help on function train:\n",
      "\n",
      "train(input_x_uri:'GCSPath', input_y_uri:'GCSPath', input_job_dir_uri:'GCSPath', input_tags:'Integer', input_words:'Integer', input_dropout, output_model_uri_template:'GCSPath')\n",
      "    Trains the NER Bi-LSTM.\n",
      "\n",
      "Help on function deploy:\n",
      "\n",
      "deploy(model_path:'GCSPath', model_name:'String', model_region:'String', model_version:'String', model_runtime_version:'String', model_prediction_class:'String', model_python_version:'String', model_package_uris:'String')\n",
      "    Deploy the model with custom prediction route\n",
      "\n"
     ]
    }
   ],
   "source": [
    "preprocess_operation = kfp.components.load_component_from_url(\n",
    "    'https://storage.googleapis.com/{}/components/preprocess/component.yaml'.format(BUCKET))\n",
    "help(preprocess_operation)\n",
    "\n",
    "train_operation = kfp.components.load_component_from_url(\n",
    "    'https://storage.googleapis.com/{}/components/train/component.yaml'.format(BUCKET))\n",
    "help(train_operation)\n",
    "\n",
    "ai_platform_deploy_operation = comp.load_component_from_url(\n",
    "    \"https://storage.googleapis.com/{}/components/deploy/component.yaml\".format(BUCKET))\n",
    "help(ai_platform_deploy_operation)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Build the Pipeline "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "@dsl.pipeline(\n",
    "  name='Named Entity Recognition Pipeline',\n",
    "  description='Performs preprocessing, training and deployment.'\n",
    ")\n",
    "def pipeline():\n",
    "    \n",
    "    preprocess_task = preprocess_operation(\n",
    "        input_1_uri='gs://kubeflow-examples-data/named_entity_recognition_dataset/ner.csv,\n",
    "        output_y_uri_template=\"gs://{}/{{workflow.uid}}/preprocess/y/data\".format(BUCKET),\n",
    "        output_x_uri_template=\"gs://{}/{{workflow.uid}}/preprocess/x/data\".format(BUCKET),\n",
    "        output_preprocessing_state_uri_template=\"gs://{}/{{workflow.uid}}/model\".format(BUCKET)\n",
    "    ).apply(kfp.gcp.use_gcp_secret('user-gcp-sa')) \n",
    "    \n",
    "    \n",
    "    train_task = train_operation(\n",
    "        input_x_uri=preprocess_task.outputs['output-x-uri'],\n",
    "        input_y_uri=preprocess_task.outputs['output-y-uri'],\n",
    "        input_job_dir_uri=\"gs://{}/{{workflow.uid}}/job\".format(BUCKET),\n",
    "        input_tags=preprocess_task.outputs['output-tags'],\n",
    "        input_words=preprocess_task.outputs['output-words'],\n",
    "        input_dropout=0.1,\n",
    "        output_model_uri_template=\"gs://{}/{{workflow.uid}}/model\".format(BUCKET)\n",
    "    ).apply(kfp.gcp.use_gcp_secret('user-gcp-sa')) \n",
    "    \n",
    "    \n",
    "    deploy_task = ai_platform_deploy_operation(\n",
    "        model_path= train_task.output,\n",
    "        model_name=\"named_entity_recognition_kubeflow\",\n",
    "        model_region=\"us-central1\",\n",
    "        model_version=\"version1\",\n",
    "        model_runtime_version=\"1.13\",\n",
    "        model_prediction_class=\"model_prediction.CustomModelPrediction\",\n",
    "        model_python_version=\"3.5\",\n",
    "        model_package_uris=\"gs://{}/routine/custom_prediction_routine-0.2.tar.gz\".format(BUCKET)\n",
    "    ).apply(kfp.gcp.use_gcp_secret('user-gcp-sa'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Compile the Pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "pipeline_func = pipeline\n",
    "pipeline_filename = pipeline_func.__name__ + '.pipeline.zip'\n",
    "\n",
    "import kfp.compiler as compiler\n",
    "compiler.Compiler().compile(pipeline_func, pipeline_filename)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create a Kubeflow Experiment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'created_at': datetime.datetime(2019, 7, 5, 10, 32, 13, tzinfo=tzlocal()),\n",
      " 'description': None,\n",
      " 'id': '84e88563-7774-4bae-aa33-4a67649c136a',\n",
      " 'name': 'named-entity-recognition'}\n"
     ]
    }
   ],
   "source": [
    "client = kfp.Client()\n",
    "\n",
    "try:\n",
    "    experiment = client.get_experiment(experiment_name=EXPERIMENT_NAME)\n",
    "except:\n",
    "    experiment = client.create_experiment(EXPERIMENT_NAME)\n",
    "    \n",
    "print(experiment)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Run the Pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "Run link <a href=\"/pipeline/#/runs/details/705a2bc2-9f1c-11e9-9120-42010a800045\" target=\"_blank\" >here</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "84e88563-7774-4bae-aa33-4a67649c136a\n",
      "pipeline run\n",
      "pipeline.pipeline.zip\n",
      "{}\n"
     ]
    }
   ],
   "source": [
    "arguments = {}\n",
    "\n",
    "run_name = pipeline_func.__name__ + ' run'\n",
    "run_result = client.run_pipeline(experiment.id, \n",
    "                                 run_name, \n",
    "                                 pipeline_filename, \n",
    "                                 arguments)\n",
    "\n",
    "print(experiment.id)\n",
    "print(run_name)\n",
    "print(pipeline_filename)\n",
    "print(arguments)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
